home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
AmigActive 10
/
AACD 10.iso
/
AACD
/
Utilities
/
GOCR
/
src
/
pgm2asc.cc
< prev
next >
Wrap
C/C++ Source or Header
|
2000-05-29
|
64KB
|
1,642 lines
#define Version "v0.2.4a4 2000/05/29" // update
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Joerg.Schulenburg@physik.uni-magdeburg.de
sometimes I have written comments in german language, sorry for that
- look for ??? for preliminary code
- space: avX=22 11-13
avX=16 5-7
avX= 7 5-6
ToDo: - add filter (r/s mismatch) g300c1
- write parallelizable code!
- learnmode (optimize filter)
- use ispell for final control or if unsure
- better line scanning (if not even)
- step 5: same chars differ? => expert mode
GLOBAL DATA (mostly structures)
- pix : image - one byte per pixel bits0-2=working
- lines : rows of the text (points to pix)
- box : list of bounding box for character
- obj : objects (lines, splines, etc. building a character)
*/
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
// #include <malloc.h> // wegen Probleme mit delete ???
#include "pgm2asc.h"
#include "pcx.h"
/* ocr1 is the test-engine */
#include "ocr1.h"
/* first engine */
#include "ocr0.h"
#define MaxBox (100*200) // largest possible letter (buffersize)
#define Uint unsigned int
#define Uchar unsigned char
#define mmax(x,y) (((x)>(y))?(x):(y))
// ostream& operator << ( ostream &os, int &i ){
// os << " " << setprecision(4) << i; return os; }
// ---------------------------------------------------------------------
// list of letter boxes (root+end or end==root)
// ---------------------------------------------------------------------
struct box *box1=(struct box*)NULL,*box2, // root, temp
*boxd=(struct box*)NULL; // database
struct environment env;
// = {cs:160,avX:5,avY:8,p:NULL}; // some compilers do not like that
struct tlines lines;
int vvv=0; // verbose
int n_run=0; // num of run, if run_2 critical pattern get other results
// used for 2nd try, pixel uses slower filter function etc.
int in_str(char c, char *s){
int i;
for(i=0;s[i]!=0 && i<100;i++) if( s[i]==c ) return i+1;
return 0;
}
// for sorting letters by position on the image
// ToDo: - use function same line like this or include lines.m1 etc.
int box_gt(struct box *box1,struct box *box2){ // box1 after box2 ?
if( box1->line > box2->line ) return 1;
if( box1->line < box2->line ) return 0;
if( box1->x0 > box2->x1 ) return 1; // before
if( box1->x1 < box2->x0 ) return 0; // before
if( box1->x0 > box2->x0 ) return 1; // before, overlapping!
return 0;
}
// remove b2 from list, delete is done by the caller!
void box_del(struct box *box2){
if(box2->pre ) box2->pre->next=box2->next;
if(box2->next) box2->next->pre=box2->pre;
if(box1==box2) box1=box2->next;
}
void box_ins_before(struct box *box2,struct box *box3){ // insert b3 before b2 in list
if(box1==NULL) { box1=box3;box3->next=box3->pre=(struct box*)NULL;return; }
box3->pre =box2->pre;
box2->pre =box3;
box3->next=box2;
if(box3->pre){
assert(box3->pre->next==box2);
box3->pre->next=box3;
}
if(box1==box2) box1=box3;
}
void box_app(struct box **box1,struct box *box3){ // append b3 on list box1
assert(box3); // must be a non NULL pointer from new()
if(*box1==NULL){ *box1=box3;box3->next=box3->pre=(struct box*)NULL;return; }
struct box *box2=*box1;while(box2->next) box2=box2->next; // search end
box3->pre =box2;
box2->next=box3;
box3->next=(struct box*)NULL;
}
// ------------------------ end list-funcs --------------------------
// ------------------ (&~7)-pixmap-functions ------------------------
// test if pixel marked?
int marked(pix &p, int x, int y){ // ret: 0=not_marked
if( x<0 || y<0 || x>=p.x || y>=p.y ) return 255 & 7;
return( p.p[x+y*p.x] & 7 ); }
#define Nfilt3 4 /* number of 3x3 filter */
char filt3[Nfilt3][9]={ /* 2=ignore_pixel, 0=white_background, 1=black_pixel */
{0,0,0, 0,0,1, 1,0,0}, /* (-1,-1) (0,-1) (0,1) (-1,0) (0,0) ... */
{0,0,0, 1,0,1, 0,0,0},
{1,0,0, 0,0,1, 0,0,0},
{1,1,0, 0,1,0, 2,1,1}
};
char c33[9];
// test if pixel --- later with error-correction
int pixel(pix &p, int x, int y){ // ret: pixel-color (without marks)
if( x<0 || y<0 || x>=p.x || y>=p.y ) return 255 & ~7;
#if 0
if((n_run&2) && x>0 && y>0 && x+1<p.x && y+1<p.y){ int r; // filter
r=(12*p.p[x+y*p.x]+p.p[x+1 +y*p.x]+p.p[x-1 +y*p.x]
+p.p[x+(y+1)*p.x]+p.p[x+(y-1)*p.x])/16;
return r & ~7;
}
#else
int i;
if(n_run&2){ // filter
c33[0]=c33[1]=c33[2]=c33[3]=c33[4]=c33[5]=c33[6]=c33[7]=c33[8]=0;
if(x>0 && y>0 ) c33[0]=p.p[x-1+(y-1)*p.x]>>7;
if( y>0 ) c33[1]=p.p[x +(y-1)*p.x]>>7;
if(x+1<p.x && y>0 ) c33[2]=p.p[x+1+(y-1)*p.x]>>7;
if(x>0 ) c33[3]=p.p[x-1+(y )*p.x]>>7;
c33[4]=p.p[x +(y )*p.x]>>7;
if(x+1<p.x ) c33[5]=p.p[x+1+(y )*p.x]>>7;
if(x>0 && y+1<p.y ) c33[6]=p.p[x-1+(y+1)*p.x]>>7;
if( y+1<p.y ) c33[7]=p.p[x +(y+1)*p.x]>>7;
if(x+1<p.x && y+1<p.y ) c33[8]=p.p[x+1+(y+1)*p.x]>>7;
for(i=0;i<Nfilt3;i++)
if( ( (filt3[i][0]>>1) || c33[0]!=(1 & filt3[i][0]) )
&& ( (filt3[i][1]>>1) || c33[1]!=(1 & filt3[i][1]) )
&& ( (filt3[i][2]>>1) || c33[2]!=(1 & filt3[i][2]) )
&& ( (filt3[i][3]>>1) || c33[3]!=(1 & filt3[i][3]) )
&& ( (filt3[i][4]>>1) || c33[4]!=(1 & filt3[i][4]) )
&& ( (filt3[i][5]>>1) || c33[5]!=(1 & filt3[i][5]) )
&& ( (filt3[i][6]>>1) || c33[6]!=(1 & filt3[i][6]) )
&& ( (filt3[i][7]>>1) || c33[7]!=(1 & filt3[i][7]) )
&& ( (filt3[i][8]>>1) || c33[8]!=(1 & filt3[i][8]) ) )
return ((filt3[i][4])?env.cs:0);
return p.p[x +(y )*p.x] & ~7;
}
#endif
if((n_run&1) && x>0 && y>0 && x+1<p.x && y+1<p.y){ int r; // filter
r=p.p[x+y*p.x]&~7;
/* {2,2,2, 2,0,1, 2,1,0} */
if((r&128) && (~p.p[x+1 +y *p.x]&128)
&& (~p.p[x +(y+1)*p.x]&128)
&& ( p.p[x+1+(y+1)*p.x]&128)) r=64; // faxfilter
else
/* {2,2,2, 1,0,2, 0,1,2} */
if((r&128) && (~p.p[x-1 +y *p.x]&128)
&& (~p.p[x +(y+1)*p.x]&128)
&& ( p.p[x-1+(y+1)*p.x]&128)) r=64; // faxfilter
return r & ~7;
}
return( p.p[x+y*p.x] & ~7 ); }
/* modify pixel, test for out of range */
void put(pix &p,int x,int y,int ia,int io){
if(x<p.x && x>=0 && y>=0 && y<p.y)
p.p[x+y*p.x]=(p.p[x+y*p.x] & ia) | io;
return;}
// modify n_run and print out what would happen on 2nd, 3th loop!
void out_b(pix b, int x0, int y0, int dx, int dy, int cs ){
int x,y,x2,y2,tx,ty,n1,n2;
static char *c1=".,,,,,,;@1234xoO";
tx=dx/80+1;ty=dy/40+1; // step, usually 1, but greater on large maps
printf("# list pattern x=%4d %4d d=%3d %3d t=%d %d\n",x0,y0,dx,dy,tx,ty);
for(y=y0;y<y0+dy;y+=ty) { // reduce the output to max 78x40
for(x=x0;x<x0+dx;x+=tx){ n1=n2=0;
for(y2=y;y2<y+ty && y2<y0+dy && n1==0;y2++) /* Mai2000 */
for(x2=x;x2<x+tx && x2<x0+dx && n1==0;x2++)
#if 0
if((pixel(b,x2,y2)<cs)){ n1=8+marked(b,x2,y2); }
#else
{
if((pixel(b,x2,y2)<cs)){ n1=8; }
if(n_run==0){
n_run++; if(!n1) if((pixel(b,x2,y2)<cs)){ n1= 9; }
n_run++; if(!n1) if((pixel(b,x2,y2)<cs)){ n1=10; }
n_run++; if(!n1) if((pixel(b,x2,y2)<cs)){ n1=11; }
n_run=0;
}
}
#endif
printf("%c", c1[n1] );
}
if ( dx>0 ) printf("\n");
}
}
void out_x(box *px){
printf("# list box dots=%d c=%c line=%d m= %d %d %d %d\n",
px->dots,px->c,px->line,
px->m1-px->y0,px->m2-px->y0,
px->m3-px->y0,px->m4-px->y0);
out_b(*(env.p),px->x0,px->y0,px->x1-px->x0+1,px->y1-px->y0+1,env.cs );
}
void out_x2(box *box1,box *box2){
int x,y,i,tx,ty; static char *c1="OXXXXxx@",*c2=".,,,,,,,";
pix *b=env.p;
tx=(box1->x1-box1->x0)/40+1;
ty=(box1->y1-box1->y0)/40+1; // step, usually 1, but greater on large maps
printf("\n# list 2 patterns");
for(i=0;i<=box1->y1-box1->y0;i+=ty) { // reduce the output to max 78x40???
printf("\n"); y=box1->y0+i;
for(x=box1->x0;x<=box1->x1;x+=tx)
printf("%c", ((pixel(*b,x,y)<env.cs)?c1[marked(*b,x,y)]
:c2[marked(*b,x,y)]));
printf(" "); y=box2->y0+i; if(y<=box2->y1)
for(x=box2->x0;x<box2->x1;x+=tx)
printf("%c", ((pixel(*b,x,y)<env.cs)?c1[marked(*b,x,y)]
:c2[marked(*b,x,y)]));
}
}
void out_b2(pix &b, int x0, int y0, int dx, int dy, int cs, int x1, int y1 ){
int x,y; static char *c1="OXXXXxx@",
*c2=".,,,,,,,";
printf(" x=%5d %5d d=%5d %5d\n",x0,y0,dx,dy);
for(y=y0;y<y0+dy;y++) {
for(x=x0;x<x0+dx;x++)
printf("%c", ((pixel(b,x,y)<cs)?c1[marked(b,x,y)]
:c2[marked(b,x,y)]));
if( dx>0 ) printf(" = ");
for(x=x1;x<x1+dx;x++)
printf("%c", ((pixel(b,x,y+y1-y0)<cs)?c1[marked(b,x,y+y1-y0)]
:c2[marked(b,x,y+y1-y0)]));
if ( dx>0 ) printf("\n");
}
}
// ------------------------ feature extraction -----------------
// -------------------------------------------------------------
// detect maximas in of line overlapps (return in %) and line koord
#define HOR 1 // horizontal
#define VER 2 // vertikal
#define RIS 3 // rising=steigend
#define FAL 4 // falling=fallend
struct tline line;
void swap(int *a,int *b){ int c=*a;*a=*b;*b=c; }
// kalkulate the overlapp of the line (0-1) with black points
// by rekursiv bisection
// gerade y=dy/dx*x+b, implizit d=F(x,y)=dy*x-dx*y+b*dx=0
// incrementell y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y))
// ret & 1 => inverse pixel!
// d=2*F(x,y) ganze Zahlen
int get_line(int x0, int y0, int x1, int y1, pix p, int cs, int ret){
int dx,dy,incrE,incrNE,d,x,y,r0,r1,ty,tx,
*px,*py,*pdx,*pdy,*ptx,*pty,*px1;
dx=abs(x1-x0); tx=((x1>x0)?1:-1); // tx=x-spiegelung (new)
dy=abs(y1-y0); ty=((y1>y0)?1:-1); // ty=y-spiegelung (new)
// rotate coordinate system if dy>dx
if(dx>dy){ pdx=&dx;pdy=&dy;px=&x;py=&y;ptx=&tx;pty=&ty;px1=&x1; }
else { pdx=&dy;pdy=&dx;px=&y;py=&x;ptx=&ty;pty=&tx;px1=&y1; }
if( *ptx<0 ){ swap(&x0,&x1);swap(&y0,&y1);tx=-tx;ty=-ty; }
d=(*pdy)*2-(*pdx); incrE=(*pdy)*2; incrNE=((*pdy)-(*pdx))*2;
x=x0; y=y0; r0=r1=0; /* dd=tolerance (store max drift) */
while( (*px)<=(*px1) ){
if( ((pixel(p,x,y)<cs)?1:0)^(ret&1) ) r0++; else r1++;
(*px)++; if( d<=0 ){ d+=incrE; } else { d+=incrNE; (*py)+=(*pty); }
}
return (r0*(ret&~1))/(r0+r1); // ret==100 => percentage %
}
// ret & 1 => inverse pixel!
// d=2*F(x,y) integer numbers, ideal line: ,I pixel: I@
// ..@ @@@ .@. ...,@2@. +1..+3 floodfill around line ???
// ..@ .@@ .@. ...,.@@@ +2..+4 <= thats not implemented yet
// ..@ ..@ .@. ...,.@@@ +2..+4
// @.@ @.. .@. ...,@@@. +1..+3
// @.@ @@. .@. ...I@@@. 0..+3
// @@@ @@@ .@. ..@1@@.. 0..+2
// 90% 0% 100% 90% r1-r2
int get_line2(int x0, int y0, int x1, int y1, pix p, int cs, int ret){
int dx,dy,incrE,incrNE,d,x,y,r0,r1,ty,tx,q,ddy,rx,ry,
*px,*py,*pdx,*pdy,*ptx,*pty,*px1;
dx=abs(x1-x0); tx=((x1>x0)?1:-1); // tx=x-spiegelung (new)
dy=abs(y1-y0); ty=((y1>y0)?1:-1); // ty=y-spiegelung (new)
// rotate coordinate system if dy>dx
if(dx>dy){ pdx=&dx;pdy=&dy;px=&x;py=&y;ptx=&tx;pty=&ty;px1=&x1;rx=1;ry=0; }
else { pdx=&dy;pdy=&dx;px=&y;py=&x;ptx=&ty;pty=&tx;px1=&y1;rx=0;ry=1; }
if( *ptx<0 ){ swap(&x0,&x1);swap(&y0,&y1);tx=-tx;ty=-ty; }
d=(*pdy)*2-(*pdx); incrE=(*pdy)*2; incrNE=((*pdy)-(*pdx))*2;
x=x0; y=y0; r0=r1=0; ddy=3; // tolerance = bit 1 + bit 0 = left+right
while( (*px)<=(*px1) ){
q=((pixel(p,x,y)<cs)?1:0)^(ret&1);
if ( !q ){ // tolerance one pixel perpenticular to the line
// what about 2 or more pixels tolerance???
ddy&=(~1)|(((pixel(p,x+ry,y+rx)<cs)?1:0)^(ret&1));
ddy&=(~2)|(((pixel(p,x-ry,y-rx)<cs)?1:0)^(ret&1))*2;
} else ddy=3;
if( ddy ) r0++; else r1++;
(*px)++; if( d<=0 ){ d+=incrE; } else { d+=incrNE; (*py)+=(*pty); }
}
return (r0*(ret&~1))/(r0+r1); // ret==100 => percentage %
}
// look for white 2 or black 1 dots (3 = white+black)
char get_bw(int x0, int x1, int y0, int y1,
pix p, int cs,int mask){
char rc=0; // later with error < 2% (1 dot)
if(x0< 0 ) x0=0; if(x1>=p.x) x1=p.x-1;
if(y0< 0 ) y0=0; if(y1>=p.y) y1=p.y-1;
for(int y=y0;y<=y1;y++)
for(int x=x0;x<=x1;x++){
if( pixel(p,x,y)<cs) rc|=1; else rc|=2; // break if rc==3
if( (rc & mask)==mask ) return mask; // break loop
}
return(rc & mask);
}
// look for white 2 or black 1 dots (3 = white+black)
char get_bw2(int x0, int x1, int y0, int y1,
pix p, int cs,int mask){
char rc=0; // later with error < 2% (1 dot)
if(x0< 0 ) x0=0; if(x1>=p.x) x1=p.x-1;
if(y0< 0 ) y0=0; if(y1>=p.y) y1=p.y-1;
for(int y=y0;y<=y1;y++)
for(int x=x0;x<=x1;x++){
if( pixel(p,x,y+x*lines.dy/p.x)<cs) rc|=1; else rc|=2; // break if rc==3
if( (rc & mask)==mask ) return mask; // break loop
}
return(rc & mask);
}
// count black part of a line (modulo w)
int get_line3(int x0, int x1, int y0, int y1, pix p,int cs,int w){
int rc=0,x,y,i,d,ww; // rc=crossings col=0=white
int dx=x1-x0, dy=y1-y0;
if( abs(dx)>=abs(dy) ) d=abs(dx); else d=abs(dy);
if(d)for(ww=i=0,x=x0,y=y0; i<=d; i++){
x=x0+i*dx/d; y=y0+i*dy/d;
if( ((pixel(p,x,y)<cs)?1:0) ) ww=w; // 0=white 1=black
if (ww) { ww--;rc++; }
}
return rc;
}
// more general Mar2000 (x0,x1,y0,y1 instead of x0,y0,x1,y1! (history))
// look for black crossing a line x0,y0,x1,y1
// follow line and count crossings ([white]-black-transitions)
int num_cross(int x0, int x1, int y0, int y1, pix p, int cs){
int rc=0,col=0,k,x,y,i,d; // rc=crossings col=0=white
int dx=x1-x0, dy=y1-y0;
if( abs(dx)>=abs(dy) ) d=abs(dx); else d=abs(dy);
for(i=0,x=x0,y=y0; i<=d; i++){
if(d){ x=x0+i*dx/d; y=y0+i*dy/d; }
k=((pixel(p,x,y)<cs)?1:0); // 0=white 1=black
if(col==0 && k==1) rc++; col=k;
}
return rc;
}
// -------------------------------------------------------------
// mark edge-points
// - first move forward until b/w-edge
// - more than 2 pixel?
// - loop around
// - if forward pixel : go up, rotate right
// - if forward no pixel : rotate left
// - stop if found first 2 pixel in same order
// mit an rechter-Wand-entlang-gehen strategie
// --------------------------------------------------------------
// turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
// out: last-position
// Zaehle dabei, Schritte,Sackgassen,xmax,ymax,ro-,ru-,lo-,lu-Ecken
//
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// GUTE IDEE VON MIR!!! KI-Methode!!! oder nicht???
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
void turmite(pix &p, int &x, int &y,
int x0, int x1, int y0, int y1, int cs, int rw, int rb){
int r;
if( x0<0 || y0<0 || x1>=p.x || y1>=p.y ) return; // out of pixmap
for(;;){
if( x<x0 || y<y0 || x>x1 || y>y1 ) break; // out of box
r = ( (pixel(p,x,y)<cs) ? rb : rw ); // select rule
if( r==UP ) y--; else
if( r==DO ) y++; else
if( r==RI ) x++; else
if( r==LE ) x--; else
if( r==ST ) break; else assert(0);
}
}
// search a way from p0 to p1 without crossing pixels of type t
// only two directions, usefull to test if there is a gap 's'
// labyrint algo - do you know a faster way?
int joined(pix &p, int x0, int y0, int x1, int y1, int cs){
int t,r,x,y,dx,dy,xa,ya,xb,yb;
x=x0;y=y0;dx=1;dy=0;
if(x1>x0){xa=x0;xb=x1;}else {xb=x0;xa=x1;}
if(y1>y0){ya=y0;yb=y1;}else {yb=y0;ya=y1;}
t=((pixel(p,x,y)<cs)?1:0);
for(;;){
if( t==((pixel(p,x+dy,y-dx)<cs)?1:0) // right free?
&& x+dy>=xa && x+dy<=xb && y-dx>=ya && y-dx<=yb) // wall
{ r=dy;dy=-dx;dx=r;x+=dx;y+=dy; } // rotate right and step forward
else { r=dx;dx=-dy;dy=r; } // rotate left
// printf(" path xy %d-%d %d-%d %d %d %d %d\n",xa,xb,ya,yb,x,y,dx,dy);
if( x==x1 && y==y1 ) return 1;
if( x==x0 && y==y0 && dx==1) return 0;
}
return 0; // endless loop ?
}
// move from x,y to direction r until pixel==col or l steps
// return number of steps
int loop(pix &p,int x,int y,int l,int cs,int col,int r){ int i;
static const int rr[5][2]={{0,0},{0,-1},{0,1},{1,0},{-1,0}};
for(i=0;i<l && x>=0 && y>=0 && x<p.x && y<p.y;i++){
if( ((pixel(p,x,y)<cs)?1:0)^col ) break;
x+=rr[r][0]; y+=rr[r][1];
}
return i;
}
// mark recursiv if no-pixel-neighbours=0 connected with (x,y)
// better with neighbours of same color (more general) ???
// (&~7)-pixmap, start-point, critical_value, mark
// mark neighbouring pixel of same color, return number
int mark_nn(pix &p, int x, int y, int &cs, int r){
if( x<0 || y<0 || x>=p.x || y>=p.y) return 0; // out of limits
if( marked(p,x,y) ) return 0; // already marked
p.p[x+y*p.x] |= (r&7); // mark (better direction)
int rc=1;
int i=((pixel(p,x ,y )<cs)?0:1);
if( i==((pixel(p,x+1,y )<cs)?0:1) ) rc+=mark_nn(p,x+1,y ,cs,r);
if( i==((pixel(p,x-1,y )<cs)?0:1) ) rc+=mark_nn(p,x-1,y ,cs,r);
if( i==((pixel(p,x ,y+1)<cs)?0:1) ) rc+=mark_nn(p,x ,y+1,cs,r);
if( i==((pixel(p,x ,y-1)<cs)?0:1) ) rc+=mark_nn(p,x ,y-1,cs,r);
if(n_run&1){ // second run (use diagonal points)
// diag-code added Mai99
// if( i==1 ) return rc; // only one color diagonal
if( i==((pixel(p,x+1,y+1)<cs)?0:1) ) rc+=mark_nn(p,x+1,y+1,cs,r);
if( i==((pixel(p,x-1,y-1)<cs)?0:1) ) rc+=mark_nn(p,x-1,y-1,cs,r);
if( i==((pixel(p,x-1,y+1)<cs)?0:1) ) rc+=mark_nn(p,x-1,y+1,cs,r);
if( i==((pixel(p,x+1,y-1)<cs)?0:1) ) rc+=mark_nn(p,x+1,y-1,cs,r);
}
return rc;
}
// use lowest three bits for mark
void frame_nn(pix &p, int x, int y,
int &x0, int &x1, int &y0, int &y1, // enlargeframe
int cs, int r){
static int stackdepth=0,msg=0;
if( x<0 || y<0 || x>=p.x || y>=p.y) return; // out of limits
if( marked(p,x,y) ) return; // already marked
stackdepth++; // if(stackdepth==1)msg=0;
if(stackdepth==10000) {
if(!msg) fprintf(stderr,"\n ERROR: Stack overflow! Large objects!");
msg=1;stackdepth--;return;
}
p.p[x+y*p.x] |= (r&7); // mark (better direction)
if(x<x0) x0=x; if(x>x1) x1=x; // enlarge frame
if(y<y0) y0=y; if(y>y1) y1=y;
int i=((pixel(p,x ,y )<cs)?0:1);
if( i==((pixel(p,x+1,y )<cs)?0:1) ) frame_nn(p,x+1,y ,x0,x1,y0,y1,cs,r);
if( i==((pixel(p,x-1,y )<cs)?0:1) ) frame_nn(p,x-1,y ,x0,x1,y0,y1,cs,r);
if( i==((pixel(p,x ,y+1)<cs)?0:1) ) frame_nn(p,x ,y+1,x0,x1,y0,y1,cs,r);
if( i==((pixel(p,x ,y-1)<cs)?0:1) ) frame_nn(p,x ,y-1,x0,x1,y0,y1,cs,r);
// diag-code added Mai99
if( i==((pixel(p,x+1,y+1)<cs)?0:1) ) frame_nn(p,x+1,y+1,x0,x1,y0,y1,cs,r);
if( i==((pixel(p,x-1,y-1)<cs)?0:1) ) frame_nn(p,x-1,y-1,x0,x1,y0,y1,cs,r);
if( i==((pixel(p,x-1,y+1)<cs)?0:1) ) frame_nn(p,x-1,y+1,x0,x1,y0,y1,cs,r);
if( i==((pixel(p,x+1,y-1)<cs)?0:1) ) frame_nn(p,x+1,y-1,x0,x1,y0,y1,cs,r);
stackdepth--; return;
}
// clear lowest 3 (marked) bits
void clr_bits(pix &p, int x0, int x1, int y0, int y1){ int x,y;
for(y=y0;y<=y1;y++)
for(x=x0;x<=x1;x++) p.p[x+y*p.x] &= ~7;
}
// --- copy part of pix p into new pix b ---- len=10000
int copybox( pix p, int x0, int y0, int dx, int dy, pix *b, int len){
int x,y; b->x=b->y=0;
if(b->p==NULL || dx<0 || dy<0 || dx*dy>len){
printf(" error-copybox x=%5d %5d d=%5d %5d\n",x0,y0,dx,dy);
return 1;
}
b->x = dx; b->y = dy; b->bpp = 1;
for(x=0;x<dx;x++)
for(y=0;y<dy;y++) b->p[x+y*dx] = pixel(p,x+x0,y+y0);
return 0;
}
// look for white holes surrounded by black points
// at moment white point with black in all for directions
// later: count only holes with vol>10% ???
int num_hole(int x0, int x1, int y0, int y1, pix p, int cs){
int rc=0,x,y; // rc:1=hole
static Uchar buf[MaxBox]; // oder 2nd copy of picture, for working
pix b; // temporary mini-page
int dx=x1-x0+1, dy=y1-y0+1;
b.p = buf;
// b.p = new Uchar[ dy*dx ]; // buffer
// b.p = (Uchar *)malloc(dy*dx);
// assert(b.p);
if( copybox(p,x0,y0,dx,dy,&b,MaxBox) ) return -1;
// --- mark white-points connected with border // if(opt&1)
for(x=0;x<b.x;x++) if(pixel(b,x, 0)>=cs) mark_nn(b,x, 0,cs,AT); // erweiterung:
for(x=0;x<b.x;x++) if(pixel(b,x,b.y-1)>=cs) mark_nn(b,x,b.y-1,cs,AT); // nur optional
for(y=0;y<b.y;y++) if(pixel(b,0 ,y)>=cs) mark_nn(b,0 ,y,cs,AT); // ist wie
for(y=0;y<b.y;y++) if(pixel(b,b.x-1,y)>=cs) mark_nn(b,b.x-1,y,cs,AT); // zusaetzl.linie
// out_b(b,0,0,b.x,b.y,cs);
// --- look for unmarked white points => hole
for(x=0;x<b.x;x++)
for(y=0;y<b.y;y++) if( !marked(b,x,y) ) // unmarked
if( pixel(b,x,y)>=cs ) // hole found
{ if( mark_nn(b,x,y,cs,AT)>1 || dx*dy<=40) rc++; }
// delete[] b.p; // segmentation fault
// free(b.p);
return rc;
}
// count for black nonconnected objects --- used for i,auml,ouml,etc.
int num_obj(int x0, int x1, int y0, int y1, pix p, int cs){
int x,y,rc=0; // rc=num_obj
static Uchar buf[MaxBox]; // oder 2nd copy of picture, for working
pix b;
b.p = buf;
if( copybox(p,x0,y0,x1-x0+1,y1-y0+1,&b,MaxBox) ) return -1;
// --- mark black-points connected with neighbours
for(x=0;x<b.x;x++)
for(y=0;y<b.y;y++) if( pixel(b,x,y) < cs )
if( !marked(b,x,y) ){
rc++; mark_nn(b,x,y,cs,AT);
}
return rc;
}
// ----------------------------------------------------------------------
// filter-funktionen um bestimmte Buchstaben auszuschliessen
// wenn Elemente aus A+B nicht ausgeschlossen
// entferne alle Elemente A (B) aus Gruppe zulaessiger
char *list="0123456789,.\0xe4\0xf6\0xfc" // "a=228 o=246 u=252
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
int wert[100];
int listlen=0,numrest=0;
void ini_list(){ int i;
for(i=0;list[i]!=0 && i<100;i++) wert[i]=0;
numrest=listlen=i; }
void excude(char *filt){ int i,j;
for(j=0;filt[j]!=0 && j<100;j++)
for(i=0;list[i]!=0 && i<100;i++)
if( filt[j]==list[i] ) { if(!wert[i])numrest--; wert[i]++; } }
char getresult(){ int i;
if( numrest==1 )
for(i=0;list[i]!=0 && i<100;i++) if(!wert[i]) return list[i];
return '_';
}
// look at the environment of the pixel too (contrast etc.)
// detailed analysis only of diff pixels!
//
// 100% * Abstand, 0 is best fit
// = Aehnlichkeit 2er Buchstaben fuer Erkennung verstuemmelter Zeichen
// weigth of pixels with only one same neighbour set to 0
// look at contours too!
int distance( pix p1, box *box1,
pix p2, box *box2, int cs){
int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2;
x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);
dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);
if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) return 100;
// compare relations to baseline and upper line
if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
// compare pixels
for( y=0;y<dy;y++ )
for( x=0;x<dx;x++ ) { // try global shift too ???
v1 =((pixel(p1,x1+x ,y1+y )<cs)?1:0); i1=8; // better gray?
v2 =((pixel(p2,x2+x ,y2+y )<cs)?1:0); i2=8; // better gray?
if(v1==v2) { rgood+=16; continue; } // all things are right!
// what about different pixel???
// test overlapp of surounding pixels ???
v1=-1;
for(i1=-1;i1<2;i1++)
for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
if( ((pixel(p1,x1+x+i1*(1+dx/32),y1+y+i2*(1+dy/32))<cs)?1:0)
!=((pixel(p2,x2+x+i1*(1+dx/32),y2+y+i2*(1+dy/32))<cs)?1:0) ) v1++;
}
if(v1>0)rbad+=16*v1;
}
if(rgood+rbad) rc= 100*rbad/(rgood+rbad); else rc=99;
// if(rc<10 && vvv){
// printf(" distance rc=%d\n",rc);
// out_x(box1);out_x(box2);
// }
return rc;
}
// new variant
// look at the environment of the pixel too (contrast etc.)
// detailed analysis only of diff pixels!
//
// 100% * distance, 0 is best fit
// = similarity of 2 chars for recognition of noisy chars
// weigth of pixels with only one same neighbour set to 0
// look at contours too!
int distance2( pix p1, box *box1,
pix p2, box *box2, int cs){
int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,
x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2,tx,ty;
x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);dx=dx1;
dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);dy=dy1;
if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) rbad++; // how to weight?
// compare relations to baseline and upper line
if(box1->m4>0 && box2->m4>0){ // used ???
if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
}
tx=dx/16; if(dx<17)tx=1; // raster
ty=dy/32; if(dy<33)ty=1;
// compare pixels
for( y=0;y<dy;y+=ty )
for( x=0;x<dx;x+=tx ) { // try global shift too ???
v1=((pixel(p1,x1+x*dx1/dx,y1+y*dy1/dy)<cs)?1:0); i1=8; // better gray?
v2=((pixel(p2,x2+x*dx2/dx,y2+y*dy2/dy)<cs)?1:0); i2=8; // better gray?
if(v1==v2) { rgood+=16; continue; } // all things are right!
// what about different pixel???
// test overlapp of surounding pixels ???
v1=1;
v1=-1;
for(i1=-1;i1<2;i1++)
for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
if( ((pixel(p1,x1+x*dx1/dx+i1*(1+dx1/32),y1+y*dy1/dy+i2*(1+dy1/32))<cs)?1:0)
!=((pixel(p2,x2+x*dx2/dx+i1*(1+dx2/32),y2+y*dy2/dy+i2*(1+dy2/32))<cs)?1:0) ) v1++;
}
if(v1>0)
rbad+=16*v1;
}
if(rgood+rbad) rc= 100*rbad/(rgood+rbad); else rc=99;
if(/* rc<10 && */ vvv&1024){
#define DEBUG 2
#if DEBUG == 2
printf(" distance2 rc=%d rgood=%d rbad=%d\n",rc,rgood,rbad);
out_b(p1,box1->x0,box1->y0,box1->x1-box1->x0+1,
box1->y1-box1->y0+1,cs);
out_b(p2,box2->x0,box2->y0,box2->x1-box2->x0+1,
box2->y1-box2->y0+1,cs);
// out_x(box1);
// out_x(box2);
#endif
}
return rc;
}
// ============================= call OCR engine ================== ;)
char whatletter(struct box *box1, int cs){
pix p=*(box1->p);
int x,y,dots,xa,ya,x0,x1,y0,y1;
int dx=x1-x0+1,dy=y1-y0+1; // size
char bc='_'; // best letter
char um=' '; // umlaut? '"
xa=box1->x; ya=box1->y;
x0=box1->x0;y0=box1->y0;
x1=box1->x1;y1=box1->y1;
// int vol=(y1-y0+1)*(x1-x0+1); // volume
// crossed l-m , divided chars
while( get_bw(x0,x1,y0,y0,p,cs,1)!=1 && y0+1<y1) y0++;
while( get_bw(x0,x1,y1,y1,p,cs,1)!=1 && y0+1<y1) y1--;
dx=x1-x0+1;
dy=y1-y0+1; // size
// better to proof the white frame too!!! ????
// --- test for german umlaut and points above, not robust enough???
// if three chars are connected i-dots (ari) sometimes were not detected
// - therefore after division a test could be usefull
#if 1
dots=box1->dots;
if( dots==0 )
if( dy>4 && 2*y0<box1->m1+box1->m2 )
{ // tall box ij"a"o"u
for(y=y0;2*y<y0+y1;y++) if( get_bw(x0,x1,y,y,p,cs,1)==0 ) break; // gap
if( 2*y<y0+y1 )
if( get_bw(x0,x0,y0,y,p,cs,1)==0 ) // be sure there are gap to neighbours
if( get_bw(x1,x1,y0,y,p,cs,1)==0 )
{
box1->dots=dots=1;um='\'';
if( dx>2 && num_obj(x0,x1,y0,y,p,cs)>=2 ){
box1->dots=dots=2;um='\"'; // may be the following lines are not quite ok
while( get_bw(x0,x1,y,y,p,cs,1)==0 && y+1<y1) y++; y0=y;
}
}
}
#endif
dots=box1->dots; // that does not work for divided letters! ??? change it!
if(dots) // proof for dots, if not => remove
{
// out_x(box1);
for(y=y0;2*y<y0+y1;y++) if( get_bw(x0,x1,y,y,p,cs,1)==0 ) break; // gap
if( 2*y>=y0+y1 ){ // not found
box1->dots=dots=0;
}
}
if( dots>=2 )
{ // tall box ij"a"o"u
for(y=y0;2*y<y0+y1;y++) if( get_bw(x0,x1,y,y,p,cs,1)==0 ) break; // gap
if( 2*y<y0+y1 ){
um='\"'; // may be the following lines are not quite ok
while( get_bw(x0,x1,y,y,p,cs,1)==0 && y+1<y1) y++; y0=y;
}
}
// move upper and lower border (for divided letters)
while( get_bw(x0,x1,y0,y0,p,cs,1)==0 && y0+1<y1) y0++;
while( get_bw(x0,x1,y1,y1,p,cs,1)==0 && y0+1<y1) y1--;
while( get_bw(x0,x0,y0,y1,p,cs,1)==0 && x0+1<x1) x0++;
while( get_bw(x1,x1,y0,y1,p,cs,1)==0 && x0+1<x1) x1--;
dx=x1-x0+1;
dy=y1-y0+1; // size
box1->x0=x0;box1->y0=y0; // set reduced frame
box1->x1=x1;box1->y1=y1;
if( pixel(p,xa,ya)>=cs || 2*ya<y0+y1){ // bad startpoint (from divide)
for(y=y1;y>=y0;y--) // low to high (not i-dot)
for(x=x0;x<=x1;x++)
if(pixel(p,x,y)<cs){ xa=x;ya=y;y=-1;break; }
}
// ----- create char-only-box -------------------------------------
if(dx*dy>MaxBox) return '@';
static Uchar buf[MaxBox]; // oder 2nd copy of picture, for working
pix b;
b.p = buf;
if( copybox(p,x0,y0,dx,dy,&b,MaxBox) ) return bc;
// ------ use diagonal too (only 2nd run?)
n_run++; mark_nn(b,xa-x0,ya-y0,cs,1); n_run--;
for(x=0;x<b.x;x++)
for(y=0;y<b.y;y++) if(!marked(b,x,y)) b.p[x+y*b.x] = 255&~7;
bc =ocr1(box1,b,cs,dots);
if(bc=='_'){n_run+=1;bc=ocr1(box1,b,cs,dots);n_run-=1;}
if(bc=='_'){ bc=ocr0(box1,b,cs,dots); }
if(bc=='_'){n_run+=1;bc=ocr0(box1,b,cs,dots);n_run-=1;}
if(bc=='_'){n_run+=2;bc=ocr0(box1,b,cs,dots);n_run-=2;}
if(bc=='_'){n_run+=3;bc=ocr0(box1,b,cs,dots);n_run-=3;}
if( um=='\"' ){
if( bc=='a' ) bc=(char)228;
if( bc=='A' ) bc=(char)196;
if( bc=='o' ) bc=(char)246;
if( bc=='O' ) bc=(char)214;
if( bc=='u' ) bc=(char)252;
if( bc=='U' ) bc=(char)220;
}
// box1->c=bc; out_x(box1); // test
return bc;
}
// ----- detect lines (old version) ---------------
/* suggestion: FT and max. of Amplitude(frequenz) as line-frequence
option: range for line numbers 1..1000 or similar
todo: look for thickest line, and divide if thickness=2*mean_thickness
*/
int detect_lines1(pix p,int x0,int y0,int dx,int dy,int r){
int i,j,j2,j3,w,stat=0,y,y2,cs=env.cs;
i=lines.num; if(~r)w=16;
// better function for scanning line arround a letter ???
// or define lines around known chars "eaTmM"
w=16; // width of characters
for(j2=y=y0;y<y0+dy;y++){
j=get_line3(x0,x0+dx/2,y,y+lines.dy/2,p,cs,w);
if(stat==1){ // line if stat=1
if( j>j2){j2=j;} /* max. schwarzanteil/zeile */
if(2*j>j2)lines.m3[i]=y;
if((j==0 && (y-lines.m3[i])>1+(y-lines.m1[i])/8) || y+1==y0+dy){
j3=lines.m3[i]-lines.m1[i];
for(y2=lines.m1[i]+j3/8;y2<lines.m1[i]+j3/2;y2++){
j=get_line3(x0,x0+dx/2,y2,y2+lines.dy/2,p,cs,w);
lines.m2[i]=y2; if(17*j>=16*j2) break; // not the best
}
stat=0;j3=lines.m1[i];lines.m4[i]=y;j2=lines.m4[i]-lines.m1[i];
if(vvv&16)printf(" %2d %2d %2d ",
lines.m2[i]-j3,lines.m3[i]-j3,lines.m4[i]-j3); /* end of line */
if(i< MAXlines && j2>7)i++;
if(i>=MAXlines){ printf("Warning: lines>MAXlines\n");break; }
j2=0;
}
} else { // empty space between lines
if(j>w){ // noise ???
stat=1;lines.m4[i]=lines.m3[i]=lines.m2[i]=lines.m1[i]=y;j2=j;
lines.x0[i]=x0;lines.x1[i]=x0+dx-1; // ???
if(vvv&16)printf("\n line= %3d m= %4d",i,y); /* start of line */
}
}
}
lines.num=i;
if(vvv)printf(" - lines= %d",lines.num);
return 0;
}
// ----- detect lines via recursiv division (new version) ---------------
int detect_lines2(pix p,int x0,int y0,int dx,int dy,int r){
int x,y,i,cs=env.cs,x2,y2,x3,y3,x4,y4,x5,y5,y6;
// shrink box
if(r>1000){ return -1;} // something is wrong
if(vvv)printf("\n r=%2d ",r);
// better look for widest h/v-gap
if(r<8){ // max. depth
// detect widest horizontal gap
y2=y3=y4=y5=y6=0; // position and thickness of gap, y6=num_gaps
for(y=4; y<dy-4; y++){
if( get_bw2(x0,x0+dx-1,y0+y,y0+y,p,cs,1)==0 ) { y4=y0+y;y5++; }
else { if(y5>y3){ y3=y5;y2=y4; }
// if(y5)printf("\n found y=%3d %3d %3d %3d",y4,y5,y2,y3);
if(y5)y6++; y5=0; }
}
// detect widest vertical gap and divide if there are lot of lines (v-crosses)
x2=x3=x4=x5=y5=0;// min. 3 lines
for(x=1; x<dx-1; x++){
y=num_cross(x0+x,x0+x,y0,y0+dy-1,p,cs);if(y>y5)y5=y;
if( y==0 ) { x4=x0+x;x5++; }
else { if(x5>x3){ x3=x5;x2=x4; } x5=0; }
}
i=((x3>2*y3 && dy>5*x3)?1:0); // good criteria? (KS idea)
if(vvv && (i || y2))printf(" divide at %s=%4d",((i)?"x":"y"),((i)?x2:y2));
// divide horizontally if v-gap is thicker than h-gap
// and length is larger 5*width
if(i){ detect_lines2(p,x0,y0,x2-x0+1,dy,r+1);
return detect_lines2(p,x2,y0,x0+dx-x2+1,dy,r+1); }
// divide vertically
i=((y2!=0 && dx>5*y3)?1:0);
if(i){ detect_lines2(p,x0,y0,dx,y2-y0+1,r+1);
return detect_lines2(p,x0,y2,dx,y0+dy-y2+1,r+1);
}
}
if(vvv)printf( " box detected at %4d %4d %4d %4d",x0,y0,dx,dy);
// remove border again!
for(x=0; x<dx; x++) if( get_bw2(x0+x,x0+x,y0,y0+dy-1,p,cs,1)==1 ) break;
if(x>0) { x0+=x-1;dx-=x-1; }
for(x=dx-1;x>0;x--) if( get_bw2(x0+x,x0+x,y0,y0+dy-1,p,cs,1)==1 ) break;
if(x<dx-1) dx=x+1;
for(y=0; y<dy; y++) if( get_bw2(x0,x0+dx-1,y0+y,y0+y,p,cs,1)==1 ) break;
if(y>0) { y0+=y-1;dy-=y-1; }
for(y=dy-1;y>0;y--) if( get_bw2(x0,x0+dx-1,y0+y,y0+y,p,cs,1)==1 ) break;
if(y<dy-1) dy=y+1;
if(dx<5 || dy<7) return 0; // do not care about dust
return detect_lines1(p,x0-1,y0-2,dx+2,dy+3,r+1);
i=lines.num; lines.num++;
lines.m1[i]=y0; lines.m2[i]=y0+5*dy/16;
lines.m3[i]=y0+12*dy/16; lines.m4[i]=y0+dy-1;
lines.x0[i]=x0; lines.x1[i]=x0+dx-1;
if(vvv)printf(" - line= %d",lines.num);
return 0;
}
// test if char c in string cset
int strc(char c, char *cset){
for(int i=0;cset[i] && i<10;i++)
if(cset[i]==c) return 1;
return 0;
}
void help(){
printf( " Optical Character Recognition gocr "Version"\n options:\n"
" -i name - image file (pnm,pgm,pbm,ppm,pcx)\n"
" -i - - read PNM from stdin (djpeg -pnm -gray a.jpg | gocr -i -)\n"
" -l num - grey level 0<160<=255\n"
" -d num - dust_size (remove all smaller clusters, 10=default)\n"
" -s num - spacewidth/dots (0 = autodetect)\n"
" -v num - verbose [summed]\n"
" 1 print more info\n"
" 2 list chapes of boxes (see -c)\n"
" 4 list pattern of boxes (see -c)\n"
" 8 print pattern after recognition\n"
" 16 print line infos\n"
" 32 debug outXX.pgm\n"
" -c string - list of chars (_ = not recognized chars)\n"
" -m num - operation modes, ~ = switch off\n"
" 2 use database (development)\n"
" 4 layout analysis (testing)\n"
" 8 ~ compare non recognized chars\n"
" 16 ~ divide overlapping chars\n"
" 32 ~ context correction\n"
" 64 char packing\n"
" examples: gocr -v 6 -v 32 -c _YV -i text1.pbm\n"
" djpeg -pnm -gray text.jpg | gocr -i -\n"
"\n");
exit(0);
}
// load database into boxd-chain
// this is added in version v0.2.4
// as alternate engine comparing chars with database
int load_db(){
FILE *f1; char s1[80],s2[80]="./db/"; int i,j,ii;
if(vvv)printf("# load database ./db/ ... ");
f1=fopen("db/db.lst","r");if(!f1){printf(" DB not found\n");return 1;}
for(ii=0;!feof(f1);ii++){
fgets(s1,80,f1);j=strlen(s1);if(j==0)continue;
for(i=0;i<j && strc(s1[i]," \t,;")==0;i++)s2[5+i]=s1[i];
/* struct pix *pp=new struct pix(); gcc2.95.2 error */
pix *pp=new pix();
readpgm(s2,pp,0*vvv);
struct box *box3=new struct box();
box3->x0=1; box3->x1=pp->x-3; // white border 1 pixel width
box3->y0=1; box3->y1=pp->y-3;
box3->x=1; box3->y=1;
box3->dots=0; box3->c=s1[i+1];
box3->next=box3->pre=NULL;
box3->num=0; box3->line=-1;
box3->m1=0;box3->m2=0;box3->m3=0;box3->m4=0;
box3->p=pp;
box_app(&boxd,box3); // append to list
}
fclose(f1);if(vvv)printf(" %d chars loaded\n",ii);
return 0;
}
char ocr_db(struct box *box1){
int d,dist=1000; char c='_'; struct box *box2,*box3;
for(box3=box2=boxd;box2;box2=box2->next){
// do preselect!!! distance() slowly
d=distance2(*(box2->p),box2,*(box1->p),box1,env.cs);
if(d<dist){ dist=d; c=box2->c; box3=box2; }
}
if(vvv)printf("\n# db dist=%4d c=%c",dist,c);
if(dist>30) c='_';
else {
vvv|=1024; // verbose output for tests
// d=distance2(*(box3->p),box3,*(box1->p),box1,env.cs);
vvv&=~1024;
}
return c;
}
// -------------------------------------------------------------
// ------ MAIN
// -------------------------------------------------------------
int main(int argn, char *argv[]){
int i,x,y,cs=0,spc=0,plothisto=0,mo=0,dust_size=10;
int sumX=0,sumY=0; // average size => sumX/numC,sumY/numC
int numC=0;
pix p,p2;
env.cs=160; env.avX=5; env.avY=8; env.p=NULL; // default values
char *inam="text.pgm",cc=0,*lc="_",*s1;
setvbuf(stdout,(char *)NULL,_IONBF,0); // not buffered
for(i=1;i<argn;i++){
if( argv[i][0]=='-' ) { cc=argv[i][1]; }
s1="";if(i+1<argn)s1=argv[i+1];
switch ( cc ) {
case 'h' : help(); break;
case 'i' : inam = s1;i++; break;
case 'c' : lc = s1;i++; break;
case 'd' : dust_size = atoi(s1);i++; break;
case 'l' : cs = atoi(s1);i++; break;
case 's' : spc = atoi(s1);i++; break;
case 'v' : vvv |= atoi(s1);i++; break;
case 'm' : mo |= atoi(s1);i++; break;
default: printf("# unknown option use -h for help\n");
}
cc=0;
}
if(!cs) cs =128+32; env.cs=cs;
if(vvv)setvbuf(stdout,NULL,_IONBF,0);
if(vvv)
printf("# options are: -i %s -l %d -s %d -v %d -c %s -m %d\n",
inam,cs,spc,vvv,lc,mo);
// ----- read picture
for(i=0;inam[i]!=0 && i<200;i++); // string-length
if(i>3 && inam[i-4]=='.'
&& inam[i-3]=='p'
&& inam[i-2]=='c'
&& inam[i-1]=='x') { readpcx(inam,&p,vvv); }
else { readpgm(inam,&p,vvv); }
env.p=&p;
{
p2.p = new Uchar[ p.y*p.x ]; // buffer
// p2.p = (Uchar *)malloc(dy*dx);
assert(p2.p);
copybox(p,0,0,p.x,p.y,&p2,p.x*p.y);
}
// ----- count colors ------ create histogram -------
Uint col[256];
makehisto(p,col,plothisto);
if(mo&2) load_db();
// this is first step for reorganize the PG
// ---- look for letters, put rectangular frames arround letters
// letter = connected points near color F
// should be used by dust removing (faster) and line detection!
// ---- 0..cs = black letters, last change = Mai99
{
clr_bits(p,0,p.x-1,0,p.y-1);
if(vvv)printf("# scanning boxes");
for(y=0; y<p.y;y++) // better: rekursiv nn-pixel suchen
for(x=0; x<p.x;x++)
{
if( marked(p,x,y) ) continue; // marked
if( pixel (p,x,y)>=cs ) continue; // no pixel
int x0=x,x1=x,y0=y,y1=y,dots=0; // box
frame_nn(p,x,y,x0,x1,y0,y1,cs,AT); // frame and mark nn-dots
p.p[x+y*p.x]|=M1; // mark startpoint
numC++;
// --- insert in list
struct box *box3=new struct box();
box3->x0=x0; box3->x1=x1;
box3->y0=y0; box3->y1=y1;
box3->x=x; box3->y=y;
box3->dots=dots; box3->c=(((y1-y0+1)*(x1-x0+1)>=MaxBox)?'@':'_');
box3->next=box3->pre=NULL;
box3->num=numC;
box3->line=0; // not used here
box3->m1=0; box3->m2=0; box3->m3=0; box3->m4=0;
box3->p=&p;
box_app(&box1,box3); // append to list
}
if(numC){ if(vvv)printf(" %d\n",numC); }
}
/* ---- remove dust ---------------------------------
What is dust? I think, this is a very small pixel cluster without
neighbours. Of course not all dust clusters can be detected correct.
This feature should be possible to switch off via option.
-> may be, all clusters should be stored here?
speed is very slow, I know, but I am happy that it is working well
*/
// new dust removing
int ds,df=100000,dd; // dust found
i=df-1; // dustsize should be revers proportional to its number
dd=dust_size/8+1; // step
struct box *box3;
for( ds=1;i<df && ds<dust_size && i>0;ds+=dd )
{ int x,y,j;df=i;i=0;
if(vvv){ printf("# searching dust size<%2d",ds+1);
if(ds>2)printf(" and remove detected dust");
printf(" ...");
}
for(i=0,box2=box1;box2;box2=box2->next){
int x0=box2->x0,x1=box2->x1,y0=box2->y0,y1=box2->y1; // box
j=0; /* count pixel */
for(x=x0;x<=x1;x++)
for(y=y0;y<=y1;y++){
if( pixel(p,x,y)<cs ){ j++; }
}
box2->dots=j; // temporaly used
}
for(i=0,box2=box1;box2;box2=box2->next){
int x0=box2->x0,x1=box2->x1,y0=box2->y0,y1=box2->y1; // box
j=box2->dots;
if(j<=ds-dd) /* remove this */
{ numC--;
for(x=x0;x<=x1;x++)
for(y=y0;y<=y1;y++){ put(p,x,y,0,255&~7); }
box3=box2->pre;box_del(box2);delete box2;box2=box3;
if(!box2)box2=box1; // this is hopefully right
continue;
}
if(j<=ds ) i++; /* count as dust particle */
}
if(vvv)printf(" %3d cluster detected\n",i);
}
if(vvv)printf("# remaining boxes %d\n",numC);
lines.dy=0; lines.num=0; // meanvalue of rise
// ----- detect longest lines, is it horizontal? ---------------
{ int dy=0,j,y2,k;
if(vvv)printf( "# detect longest line"); // or read from outside???
// most black/white changes ???
for(y2=i=y=0;y<p.y;y+=4){
j=num_cross(0,p.x-1,y,y,p,cs); if(j>i){ i=j; y2=y; }
}
if(vvv)printf(" - at y=%d crosses=%3d",y2,i);
if(y2>100) // better only on long lines
for(k=0;k<30;k++) // change angle, good algo???
for(y=y2-15;y<y2+15;y++){
j=num_cross(0,p.x-1,y,y+k,p,cs); if(j>i){ i=j; y2=y; dy= k-1; }
j=num_cross(0,p.x-1,y,y-k,p,cs); if(j>i){ i=j; y2=y; dy=-k+1; }
}
lines.dy=dy;
if(vvv)printf(" - at y=%d crosses=%3d dy=%d\n",y2,i,dy);
{ dy=lines.dy;
if(vvv&32){
for(y=0;y<p.y;y++)for(x=0;x<p.x;x++)p2.p[x+p.x*y]=p.p[x+p.x*y]&(192);
for(x=0;x<p2.x;x++)if((x&35)>32)put(p2,x,y2+dy*x/p2.x,255,32);
// writebmp("out10.bmp",p2,vvv); // colored should be better
}
}
}
// ----- detect lines ---------------
if(vvv)printf( "# scanning lines "); // or read from outside???
if(mo&4) detect_lines2(p,0,0,p.x,p.y,0); // later replaced by better algo
else detect_lines1(p,0,0,p.x,p.y,0); // old algo
{ int dy=lines.dy;
if(vvv&32){
for(i=0;i<lines.num;i++){ // mark lines
for(x=lines.x0[i];x<lines.x1[i];x++){
y=lines.m1[i];if((x& 7)==4)put(p2,x,y+dy*x/p2.x,255,32);
y=lines.m2[i];if((x& 3)==2)put(p2,x,y+dy*x/p2.x,255,32);
y=lines.m3[i];if((x& 1)==1)put(p2,x,y+dy*x/p2.x,255,32);
y=lines.m4[i];if((x& 7)==4)put(p2,x,y+dy*x/p2.x,255,32);
}
}
// writebmp("out10.bmp",p2,vvv); // colored should be better
}
}
// erase box list, temporarely (later sorting and gluing)
for(;box1;){
box2=box1; box_del(box2); delete box2;
}
numC=0;
// ---- clear last 3 bits (flag)
clr_bits(p,0,p.x-1,0,p.y-1);
// ---- look for letters, put rectangular frames arround letters
// letter = connected points near color F
if(vvv)printf("\n# scanning boxes");
for(i=0;i<lines.num;i++) // nur erkannte Zeilen
{
for(x=lines.x0[i]; x<=lines.x1[i];x++)
for(y=lines.m3[i];2*y>lines.m2[i]+lines.m1[i];y--) // better: rekursiv nn-pixel suchen
{
int m1=lines.m1[i]+lines.dy*x/p.x;
int m2=lines.m2[i]+lines.dy*x/p.x;
int m3=lines.m3[i]+lines.dy*x/p.x;
int m4=lines.m4[i]+lines.dy*x/p.x;
if( marked(p,x,y) ) continue; // marked
if( pixel (p,x,y)>=cs ) continue; // no pixel
int x0=x,x1=x,y0=y,y1=y,dots=0; // box
frame_nn(p,x,y,x0,x1,y0,y1,cs,AT); // frame and mark nn-dots
p.p[x+y*p.x]|=M1; // mark startpoint
if(2*y0>m2+m3 || y1-y0<5){ // .-, test :=;!? better explicite dot-proof-func
int x,y;
for(x=x0;x<x1+2;x++)for(y=m2;y<y0;y++)
if(!marked(p,x,y))if(pixel(p,x,y)<cs && pixel(p,x+(x1-x0)/8,y)<cs)
frame_nn(p,x,y,x0,x1,y0,y1,cs,AT); // expand frame box
}
if(2*y0>m1+m2){ // aeiou test umlaut \"a\"o\"ui
// shrink white frame
int x,y,x2,y2,y3=y0;y2=m1;x2=x1+1;
if(x2-x0>4 && 2*(x1-x0)>y1-y0) x2=x1-1;
if(y1-y0>4 && y0-(y1-y0)/2>y2) y2=y0-(y1-y0)/2;
for(x=x0;x<x2;x++)for(y=y2;y<y3;y++)
if(!marked(p,x,y))if(pixel(p,x,y)<cs && pixel(p,x+(x1-x0)/8,y)<cs)
{ frame_nn(p,x,y,x0,x1,y0,y1,cs,AT);dots++; } // expand frame box
// printf(" dots=%d\n",dots);out_b(p,x0,y0,x1-x0+1,y1-y0+1,cs);
}
if(2*y0<m1+m2 && 2*y1<m3+m4)
if( y1<m3-(y1-y0)/16 ){ // test !?
int x,y,y2;y2=m3;if(y2>y1+(y1-y0)/2)y2=y1+(y1-y0)/2;
for(x=x0;x<x1-2;x++)for(y=y1;y<y2;y++)
if(!marked(p,x,y))if(pixel(p,x,y)<cs)
frame_nn(p,x,y,x0,x1,y0,y1,cs,AT); // expand frame box
}
if( (y1-y0+1)*(x1-x0+1)<5 ) // points arround ???
if( 2*y1<m3+m2) continue; // dust
numC++; sumY+=y1-y0+1; sumX+=x1-x0+1;
// --- insert in list
#if 0
struct box *box3=new struct box
((struct box){x0,x1,y0,y1,x,y,dots,(struct box*)NULL,(struct box*)NULL,
'_',numC,i,
lines.m1[i]+lines.dy*x/p.x,
lines.m2[i]+lines.dy*x/p.x,
lines.m3[i]+lines.dy*x/p.x,
lines.m4[i]+lines.dy*x/p.x,&p});
#else
struct box *box3=new struct box();
box3->x0=x0; box3->x1=x1;
box3->y0=y0; box3->y1=y1;
box3->x=x; box3->y=y;
box3->dots=dots; box3->c=(((y1-y0+1)*(x1-x0+1)>=MaxBox)?'@':'_');
box3->next=box3->pre=NULL;
box3->num=numC; box3->line=i;
box3->m1=lines.m1[i]+lines.dy*x/p.x;
box3->m2=lines.m2[i]+lines.dy*x/p.x;
box3->m3=lines.m3[i]+lines.dy*x/p.x;
box3->m4=lines.m4[i]+lines.dy*x/p.x;
box3->p=&p;
#endif
if(box3->y1-box3->y0 >= box3->m3-box3->m2){ // letter
int dx=0; // correction of obviously wrong lines.dy
if(box3->y1 > box3->m4) dx=box3->y1 - box3->m4;
if(box3->y1 < box3->m3) dx=box3->y1 - box3->m3;
if(box3->y0 > box3->m2) dx=box3->y0 - box3->m2;
if(box3->y0 < box3->m1) dx=box3->y0 - box3->m1;
box3->m1+=dx; box3->m2+=dx;
box3->m3+=dx; box3->m4+=dx;
}
box_app(&box1,box3); // append to list
#if 0
int y2,y3; // i-dot but no "Vo" overlap
y3=((y1-y0>x1-x0)?y1-y0:x1-x0);
y2=lines.m1[i]+lines.dy*x/p.x;if(y0-y2>y3/4)y2=y0-y3/4;if(y3<5)y2=y0-7;
for(y=y2;y< y0;y++)if(get_bw(x0+(x1-x0)/4,x1-1,y,y,p,cs,1)){ box3->y0=y;break; }
y2=lines.m3[i]+lines.dy*x/p.x;if(y2-y1>y3/4)y2=y1+y3/4;if(y3<5)y2=y1+5;
for(y=y2;y>=y1;y--)if(get_bw(x0+1,x1-1,y,y,p,cs,1)){ box3->y1=y;break; }
x=x1+1; break;
#endif
}
}
if(numC){ env.avY=sumY/numC; env.avX=sumX/numC;
if(vvv)printf(" %d - average X Y %d %d\n",numC,sumX/numC,sumY/numC);
}
// ---- analyse boxes, find pictures (do this first!!!)
if(vvv)printf("# detect pictures");
for(i=0,box2=box1;box2;box2=box2->next){
int x0=box2->x0,x1=box2->x1,y0=box2->y0,y1=box2->y1; // box
if( x1-x0+1>10*env.avX || y1-y0+1>10*env.avY ) /* large picture */
{ sumX-=x1-x0+1; sumY-=y1-y0+1; numC--; box2->c='@'; }
if( box2->c=='@' ) i++;
if( 4*(y1-y0+1)<env.avY || y1-y0<2) // dots .,-_ etc.
{ sumX-=x1-x0+1; sumY-=y1-y0+1; numC--; } // better after pictures!
}
if(!numC){ printf("\n no chars found - stopped\n");exit(1); }
env.avY=sumY/numC; env.avX=sumX/numC;
if ( spc==0 ) spc = (env.avX+20) / 4;
if(vvv)printf(" %d - boxes %d",i,numC);
if(vvv)printf(" - new average X Y %d %d spc %d\n",env.avX,env.avY,spc);
// ---- analyse boxes, compare chars, compress picture ------------
// ToDo: - error-correction only on large chars!
if((mo&64)){
if(vvv)printf("# packing");
for(i=0,box2=box1;box2;box2=box2->next,i++); // count boxes
for(box2=box1;box2;box2=box2->next){
struct box *box3,*box4=box2,*box5;
int dist=1000,n1; // 100% maximum
int dx = box2->x1 - box2->x0 + 1;
if(vvv)printf("\r# packing %5d",i);
if( dx>3 )
for(box3=box2->next;box3;box3=box3->next)if(box2->num!=box3->num){
int d=distance(p,box2,p,box3,cs);
if ( d<dist ) { dist=d; box4=box3; } // best fit
if ( d<5 ){ // good limit = 5% ???
i--;n1=box3->num; // set all num==box2.num to box2.num
for(box5=box1;box5;box5=box5->next)if(box5!=box2)
if( box5->num==n1 ) box5->num=box2->num;
// out_b2(p,box2->x0,box2->y0,dx,dy,cs,box5->x0,box5->y0);
// printf(" dist=%d\n",d);
}
}
// nearest dist to box2 has box4
// out_b2(p,box2->x0,box2->y0,dx,dy,cs,box4->x0,box4->y0);
// printf(" dist=%d\n",dist);
}
int k=0;
if(vvv)printf(" %d different chars",i);
for(box2=box1,i=0;box2;box2=box2->next){
struct box *box3,*box4;
int j,dist;
for(box3=box1;box3!=box2 && box3!=NULL;box3=box3->next)
if(box3->num==box2->num)break;
if(box3!=box2 && box3!=NULL)continue;
i++;
// count number of same chars
dist=0;box4=box2;
for(box3=box2,j=0;box3;box3=box3->next)if(box3->num==box2->num){
j++;
int d=distance(p,box2,p,box3,cs);
if ( d>dist ) { dist=d; box4=box3; } // worst fit
}
if(vvv&8){
out_b2(p,box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1,cs,
box4->x0,box4->y0);
printf(" no %d char %4d %5d times maxdist=%d\n",i,box2->num,j,dist);
}
// calculate mean-char (error-correction)
// ToDo: calculate maxdist in group
k+=j;
// if(j>1)
// out_b(p,box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1,cs);
if(vvv&8)
printf(" no %d char %4d %5d times sum=%d\n",i,box2->num,j,k);
}
if(vvv)printf(" ok\n");
}
// ---- analyse boxes, find chars ---------------------------------
if(vvv)printf("# step 1: char recognition");
for(i=0,box2=box1;box2;box2=box2->next){
struct box *box3;
int x0=box2->x0,x1=box2->x1,
y0=box2->y0,y1=box2->y1; // box
char cc=box2->c; // only makes sence on gray-pictures!
if(cc=='_') cc=whatletter(box2,cs ); // 90%
// if(cc=='_') cc=whatletter(box2,cs-20); // 60%
if(mo&2) if(cc=='_') cc=ocr_db(box2);
if(vvv&8) { printf("%c\n",cc);
out_b(p,x0,y0,x1-x0+1,y1-y0+1,cs); }
box2->c=cc; if(cc=='_') // copy char
for(box3=box1;box3;box3=box3->next)if(box3->num==box2->num)box3->c=cc;
if( cc=='_' ) i++;
}
if(vvv)printf(", %d chars unidentified\n",i);
// ----------- write out20.pgm -----------
if(vvv&32){
for(box2=box1;box2;box2=box2->next){
for(y=box2->y0;y<=box2->y1;y++)
for(x=box2->x0;x<=box2->x1;x++){
if( x==box2->x0 || x==box2->x1
|| y==box2->y0 || y==box2->y1 ) p2.p[x+y*p.x]|=32; // box
if( x> box2->x0 && x< box2->x1
&& y> box2->y0 && y< box2->y1 )
if(box2->c=='_' && (1&(x+y))!=0) p2.p[x+y*p.x]|=16; // box
}
}
// writepgm("out20.pgm",p2);
writebmp("out20.bmp",p2,vvv); // colored should be better
// for(y=0;y<p.y;y++)for(x=0;x<p.x;x++)p2.p[x+p.x*y]=p.p[x+p.x*y];
}
// ---- -------------------------------
if(vvv)printf("# step 2: try to compare unknown with known chars");
if(!(mo&8))
for(i=0,box2=box1;box2;box2=box2->next)if(box2->c=='_'){
struct box *box3,*box4=box1; int dist=1000,d; // 100% maximum
char bc='_'; // best fit char
for(box3=box1;box3;box3=box3->next)if(box3!=box2)if(box3->c!='_'){
d=distance(p,box2,p,box3,cs);
if ( d<dist ) { dist=d; bc=box3->c; box4=box3; }
}
if(dist<10) { box2->c=bc;i++; } // limit as option???
// => better max distance('e','e') ???
if( dist<50 && (vvv&7)){ // only for debugging
printf("\n# L%02d best fit was %c %3d%% %d",box2->line,bc,dist,i);
out_x2(box2,box4);
}
}
if(vvv)printf(" - found %d\n",i);
// ---- divide overlapping chars which !in_str(c,"_,.:;");
// completely changed at Mar2000
// division if dots>0 does not work proper! ???
//
// what about glued be?
// what about rekursiv division?
if(vvv)printf("# step 3: try to divide unknown chars");
if(!(mo&16))
for(box2=box1;box2;box2=box2->next)
if(box2->c=='_' && box2->x1-box2->x0>5 && box2->y1-box2->y0>4){
struct box *box3;
struct box boxa,boxb,boxc; // temporarely needed boxes
char c1,c2,c3,*s1="_.,'!;?:-=()"; // not accepted chars
int k2,x0,x1,y0,y1,x=0,x2=0;c1=c2=c3='_';
x0=box2->x0; x1=box2->x1;
y0=box2->y0; y1=box2->y1;
// one vertical line can not be two glued chars
if( num_cross(x0,x1,(y1+y0)/2,(y1+y0)/2,p,cs)>1 )
{ // doublet = 2 letters
int i,ii,j,k,m,m1,m2,m3,i1,i2,i3,dx=(x1-x0)/64,dy=(y1-y0+1);
if(vvv&32)out_b(p,x0,y0,x1-x0+1,y1-y0+1,cs);
m1=m2=m3=0; i1=i2=i3=0; // searching minima m1 m2 m3
// it would be better if testing is only if most right and left char
// is has no horizontal gap (below m2) ex: be
for(i=0;i<(x1-x0)/2-2;i++) // rm <=> nn .@ mask? for better sorting
for(ii=-1;ii<2;ii+=((i)?2:4)){
k2=0;
for(m=-i/8,j=y0;j<=y1;j++){
k=((pixel(p,(x1+x0)/2+ii*i,j)<cs)?0:1); m+=4*k; // using gray ???
if(!k) m+=((pixel(p,(x1+x0)/2+ii*i-1,j)<cs)?0:2);
if(!k) m+=((pixel(p,(x1+x0)/2+ii*i+1,j)<cs)?0:2);
if(!k) m+=((pixel(p,(x1+x0)/2+ii*i-2,j)<cs)?0:1);
if(!k) m+=((pixel(p,(x1+x0)/2+ii*i+2,j)<cs)?0:1);
if(k!=k2) m-=dy/2; k2=k; // many b/w changes are bad!
}
// replace one of 3 maxima (nearest or lowest
if( abs(i3-ii*i)<2+dx ){ if(m>m3) { m3=m;i3= ii*i; } } else
if( abs(i2-ii*i)<2+dx ){ if(m>m2) { m2=m;i2= ii*i; } } else
if( abs(i1-ii*i)<2+dx ){ if(m>m1) { m1=m;i1= ii*i; } } else
{ if(m>m3) { m3=m;i3= ii*i; } }
// sort it
if( m3>m2 ){ k=m2;m2=m3;m3=k; k=i2;i2=i3;i3=k; }
if( m2>m1 ){ k=m1;m1=m2;m2=k; k=i1;i1=i2;i2=k; }
if( m3>m2 ){ k=m2;m2=m3;m3=k; k=i2;i2=i3;i3=k; }
}
i1+=(x1+x0)/2;
i2+=(x1+x0)/2;
i3+=(x1+x0)/2;
if(vvv&32)printf(" i1,i2,i3 m123= %d %d %d %d %d %d\n",i1-x0,i2-x0,i3-x0,m1,m2,m3);
// removing ->dots if dot only above one char !!! ??? not implemented
if( 2*m1>y1-y0 ) // minimum of white pixels should be found
{
boxa=*box2;boxb=*box2,boxc=*box2; // copy contents
boxa.next=&boxb;boxb.pre=&boxa; // new pointers
x=i1;
boxa.x=x0; boxa.y=y0;boxa.x1=x;
boxb.x=x+1;boxb.y=y0;boxb.x0=x+1;
c1=whatletter(&boxa,cs); // unknown startpos!
c2=whatletter(&boxb,cs);
// boxa..c changed!!! dots should be modified!!!
if( in_str(c1,s1) || in_str(c2,s1) ) x=0;
if(vvv&32)printf(" x c12 =%d %c %c\n",x-x0,c1,c2);
}
if( 2*m2>y1-y0 )
if(!x){
boxa=*box2;boxb=*box2,boxc=*box2; // copy contents
boxa.next=&boxb;boxb.pre=&boxa; // new pointers
x=i2;
boxa.x=x0; boxa.y=y0;boxa.x1=x;
boxb.x=x+1;boxb.y=y0;boxb.x0=x+1;
c1=whatletter(&boxa,cs); // unknown startpos!
c2=whatletter(&boxb,cs);
if( in_str(c1,s1) || in_str(c2,s1) ) x=0;
if(vvv&32)printf(" x c12 =%d %c %c\n",x-x0,c1,c2);
}
if( 2*m3>y1-y0 )
if(!x){
boxa=*box2;boxb=*box2,boxc=*box2; // copy contents
boxa.next=&boxb;boxb.pre=&boxa; // new pointers
x=i3;
boxa.x=x0; boxa.y=y0;boxa.x1=x;
boxb.x=x+1;boxb.y=y0;boxb.x0=x+1;
c1=whatletter(&boxa,cs); // unknown startpos!
c2=whatletter(&boxb,cs);
if( in_str(c1,s1) || in_str(c2,s1) ) x=0;
if(vvv&32)printf(" x c12 =%d %c %c\n",x-x0,c1,c2);
}
if( 2*m2>y1-y0 )
if(!x){
boxa=*box2;boxb=*box2,boxc=*box2; // copy contents
boxa.next=&boxb;boxb.pre=&boxa; // new pointers
x2=i2; x=i1; if(x>x2){ k=x;x=x2;x2=k; }
boxa.x=x0; boxa.y=y0;boxa.x1=x;
boxb.x=x+1; boxb.y=y0;boxb.x0=x+1;boxb.x1=x2;
boxc.x=x2+1;boxc.y=y0;boxc.x0=x2+1;
c1=whatletter(&boxa,cs); // unknown startpos!
c2=whatletter(&boxb,cs);
c3=whatletter(&boxc,cs);
if( in_str(c1,s1) || in_str(c2,s1) || in_str(c3,s1) ) x=0;
if(vvv&32)printf(" x c123=%d %c %c %c\n",x-x0,c1,c2,c3);
}
if( 2*m3>y1-y0 )
if(!x){
boxa=*box2;boxb=*box2,boxc=*box2; // copy contents
boxa.next=&boxb;boxb.pre=&boxa; // new pointers
x2=i3; x=i1; if(x>x2){ k=x;x=x2;x2=k; }
boxa.x=x0; boxa.y=y0;boxa.x1=x;
boxb.x=x+1; boxb.y=y0;boxb.x0=x+1;boxb.x1=x2;
boxc.x=x2+1;boxc.y=y0;boxc.x0=x2+1;
c1=whatletter(&boxa,cs); // unknown startpos!
c2=whatletter(&boxb,cs);
c3=whatletter(&boxc,cs);
if( in_str(c1,s1) || in_str(c2,s1) || in_str(c3,s1) ) x=0;
if(vvv&32)printf(" x c123=%d %c %c %c\n",x-x0,c1,c2,c3);
}
if( 2*m3>y1-y0 )
if(!x){
boxa=*box2;boxb=*box2,boxc=*box2; // copy contents
boxa.next=&boxb;boxb.pre=&boxa; // new pointers
x2=i3; x=i2; if(x>x2){ k=x;x=x2;x2=k; }
boxa.x=x0; boxa.y=y0;boxa.x1=x;
boxb.x=x+1; boxb.y=y0;boxb.x0=x+1;boxb.x1=x2;
boxc.x=x2+1;boxc.y=y0;boxc.x0=x2+1;
c1=whatletter(&boxa,cs); // unknown startpos!
c2=whatletter(&boxb,cs);
c3=whatletter(&boxc,cs);
if( in_str(c1,s1) || in_str(c2,s1) || in_str(c3,s1) ) x=0;
if(vvv&32)printf(" x c123=%d %c %c %c\n",x-x0,c1,c2,c3);
}
}
if(x>x0 && x<x1){ // seperate first
box2->y0=boxb.y0;
box2->y1=boxb.y1;
// --- insert ind list
box3=new struct box((struct box)boxa); // *box2=>boxa,boxb 024a4
box3->x1=x; box3->c=c1;
box2->x0=x+1; box2->c=c2;
box_ins_before(box2,box3); numC++;
if(x2>x && x2<x1){
// --- insert in list
box3=new struct box((struct box)boxb);
box3->x1=x2; box3->c=c2;
box2->x0=x2+1; box2->c=c3;
box_ins_before(box2,box3); numC++;
}
continue;
}
}
if(vvv)printf(", numC %d\n",numC);
// ---- list output ---- for debugging
if(vvv&6){
for(i=0,box2=box1;box2;box2=box2->next,i++)
if( strc(box2->c,lc) ){
printf("# list chape %3d x=%4d %4d d=%3d %3d h=%d o=%d dots=%d %c\n",
i,box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1,
num_hole(box2->x0,box2->x1,box2->y0,box2->y1,p,cs),
num_obj (box2->x0,box2->x1,box2->y0,box2->y1,p,cs),
box2->dots, box2->c );
if( vvv&4 ){
// out_b(p,box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1,cs);
out_x(box2);
}
}
}
// ---- insert spaces ----
for(i=0,box2=box1;box2;box2=box2->next,i++){
char cc=0;
if(box2->pre){
if(box2->line!=box2->pre->line )
if(lines.m3[box2->line] > lines.m4[box2->pre->line]) cc='\n'; // NL
else cc=' ';
if(box2->x0 > box2->pre->x1)
if(box2->x0 - box2->pre->x1 > spc) cc=' '; // SPC
if(cc){
#if 0
struct box *box3=new struct box ((struct box){
box2->pre->x1+2,box2->x0-2,box2->y0,box2->y1,
box2->x0-1,box2->y0,0,(struct box*)NULL,(struct box*)NULL,
cc,0,box2->pre->line,0,0,0,0,&p});
#else
struct box *box3=new struct box();
box3->x0=box2->pre->x1+2; box3->x1=box2->x0-2;
box3->y0=box2->y0; box3->y1=box2->y1;
box3->x =box2->x0-1; box3->y=box2->y0;
box3->dots=0; box3->c=cc;
box3->next=box3->pre=NULL;
box3->num=0; box3->line=box2->pre->line;
box3->m1=0; box3->m2=0; box3->m3=0; box3->m4=0;
box3->p=&p;
#endif
box_ins_before(box2,box3);
}
}
}
// ---- proof Il1 by context view ----
// context: seperator, number, vokal, nonvokal, upper char ????
if(vvv)printf("# step 4: context correction Il1 0O");
if(!(mo&32))
for(i=0,box2=box1->next;box2;box2=box2->next,i++){
static char // *l_vokal="aeiou", *l_Vokal="AEIOU",
*l_nonvo="bcdfghjklmnpqrstvwxyz",
*l_small="abcdefghijklmnopqrstuvwxyz",
*l_digit="0123456789";
if( in_str(box2->c,"Il1|") && box2->next && box2->pre ){ // tall chars
// if( in_str(box2->pre->c," \n") // SPC
// && in_str(box2->next->c," \n") ) box2->c='I'; else // bad idea! I have ...
if( in_str(box2->pre->c," \n") // SPC
&& in_str(box2->next->c,l_nonvo) ) box2->c='I'; else
if( in_str(box2->pre->c, l_small) ) box2->c='l'; else
if( in_str(box2->pre->c, l_digit)
|| in_str(box2->next->c,l_digit) ) box2->c='1';
}
if( in_str(box2->c,"O0") && box2->next && box2->pre ){
if( in_str(box2->pre->c," \n") // SPC
&& in_str(box2->next->c,l_nonvo) ) box2->c='O'; else
if( in_str(box2->pre->c, l_digit)
|| in_str(box2->next->c,l_digit) ) box2->c='0';
}
if(box2->pre->c==' ' && box2->c=='.'){
box_del(box2->pre); // memory leak ???
}
}
if(vvv)printf("\n");
// ---- ASCII output
for(i=0,box2=box1;box2;box2=box2->next,i++){
printf("%c",box2->c);
if(box2->c==' ') // fill large gaps with spaces
for(i=(box2->x1-box2->x0)/(2*env.avX+1);i>0;i--)printf(" ");
}
printf("\n");
// ---- frame-size-histogramm
// ---- (my own defined) distance between letters
// ---- write internal picture of textsite
// ----------- write out30.pgm ----------- how to mark mark tolerance ???
if(vvv&32){
for(box2=box1;box2;box2=box2->next){
for(y=box2->y0;y<=box2->y1;y++)
for(x=box2->x0;x<=box2->x1;x++){
if( box2->c!=' ' && box2->c!='\n' )
if( x==box2->x0 || x==box2->x1
|| y==box2->y0 || y==box2->y1 ) p2.p[x+y*p.x]|=32; // box
if( x> box2->x0 && x< box2->x1
&& y> box2->y0 && y< box2->y1 )
if(box2->c=='_') p2.p[x+y*p.x]|=16; // box
}
}
// writepgm("out30.pgm",p2);
writebmp("out30.bmp",p2,vvv); // colored should be better
// for(y=0;y<p.y;y++)for(x=0;x<p.x;x++)p2.p[x+p.x*y]=p.p[x+p.x*y];
}
delete p.p;
delete p2.p;
while(box1) { box2=box1->next; delete box1; box1=box2; }
}